clear all
set more off
set maxvar 10000



	use "$Mydirectory1/3_Output/2_PooledData_analysis.dta", clear 
    keep if baseline_sample==1

    tab data
    replace data = "_"+data
    sort data decade
    
    by data decade: egen pop = sum(1)
    
    keep data decade pop 
    bysort data decade: keep if _n==1
    
    reshape wide pop , i(decade) j("data", string)
    drop if decade<1910 | decade>1970
 
    #delimit ;
    graph bar (mean) pop*, over(decade) stack
    legend(on col(4) size(small) 
    order(1 "ANES" 2 "AVTMH 57" 3 "AVTMH 76" 4 "GSS" 5 "NFS" 6 "NLS-MW" 7 "NLS-OM" 8 "NLSY79" 
    9 "NLS-YM" 10 "NLS-YW" 11 "NSBA" 12 "NSFH" 13 "OCG 62" 14 "OCG 73" 15 "PSID" )) 
    ytitle("Number of observations" " ") 
    bar(1, color(purple)) bar(2,color(orange*0.45)) bar(3,color(lime*0.5)) bar(4,color(midblue*0.75)) bar(5,color(maroon*1.25)) 
    bar(6,color(red*0.75)) bar(7,color(black*0.9)) bar(8,color(gray)) bar(9,color(gold*1.05)) bar(10,color(pink*0.5)) 
    bar(11,color(forest_green*0.5)) bar(12,color(forest_green*1.25)) bar(13,color(lavender*0.55)) bar(14,color(lavender*1.3)) bar(15,color(eltblue*0.7))  ;
    #delimit cr
    graph export "$Mydirectory2/appendix_e/Histogram_decades_baselinedata.pdf", as(pdf) replace 
    
    